# load necessary packages
library(tidyverse)
library(mosaic)
library(DataComputing)
library(ggplot2)
What factors are common between all hall of fame MLB baseball players? Hall of Fame baseball players are obviously a step ahead of everybody else. They are considered great players and have a knack for understanding the game. Becoming a Hall of Fame baseball player is no easy feat. They have advanced stats that show how great the players truly are; but what enables a baseball player to be inducted into the Hall of Fame? We are going to dive deep and see the comparisons between inducted Hall of Fame players and other players who tried to live up to the legendary names. Some challenges we faced with some of the data sets was that some of the data sets had incompleteness in the data. For some of the players from the older years, some statistics were not tracked because of the inconsistency of recording data during these years. Some stats are currently tracked through computers which were not accessible at some years shown in the data sets. Another problem we faced was the lack of explanation of some data. Some players were repeated twice for certain years in categories that should not show repetition. This throws the numbers we tracked off by a few, but did not skew the results too much, luckily. One finding that we found to be significant was the amount of times that inducted players had a batting average over .5 (500). This is no easy feat as it means that at least half of the players hits got them on a base through the whole year. The most shocking part about this data is how high the numbers got to be. We made sure that players played in at least 20 games, and players were still hitting over .65 (650). This shows how excellent these players were at the time and through certain years.
HallOfFame <- read_csv("core/HallOfFame.csv")
Parsed with column specification:
cols(
playerID = [31mcol_character()[39m,
yearID = [32mcol_double()[39m,
votedBy = [31mcol_character()[39m,
ballots = [32mcol_double()[39m,
needed = [32mcol_double()[39m,
votes = [32mcol_double()[39m,
inducted = [31mcol_character()[39m,
category = [31mcol_character()[39m,
needed_note = [31mcol_character()[39m
)
AllstarFull <- read_csv("core/AllstarFull.csv")
Parsed with column specification:
cols(
playerID = [31mcol_character()[39m,
yearID = [32mcol_double()[39m,
gameNum = [32mcol_double()[39m,
gameID = [31mcol_character()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
GP = [32mcol_double()[39m,
startingPos = [32mcol_double()[39m
)
Salaries <- read_csv("core/Salaries.csv")
Parsed with column specification:
cols(
yearID = [32mcol_double()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
playerID = [31mcol_character()[39m,
salary = [32mcol_double()[39m
)
Batting <- read_csv("core/Batting.csv")
Parsed with column specification:
cols(
.default = col_double(),
playerID = [31mcol_character()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
IBB = [33mcol_logical()[39m,
HBP = [33mcol_logical()[39m,
SH = [33mcol_logical()[39m,
SF = [33mcol_logical()[39m
)
See spec(...) for full column specifications.
87292 parsing failures.
row col expected actual file
1999 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2001 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2020 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2022 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2027 HBP 1/0/T/F/TRUE/FALSE 5 'core/Batting.csv'
.... ... .................. ...... ..................
See problems(...) for more details.
head(HallOfFame)
glimpse(HallOfFame)
Observations: 4,191
Variables: 9
$ playerID [3m[38;5;246m<chr>[39m[23m "cobbty01", "ruthba01", "wagneho01", "mathech01", "johnswa01", "lajoina01", "speaktr01…
$ yearID [3m[38;5;246m<dbl>[39m[23m 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 19…
$ votedBy [3m[38;5;246m<chr>[39m[23m "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWA…
$ ballots [3m[38;5;246m<dbl>[39m[23m 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 7…
$ needed [3m[38;5;246m<dbl>[39m[23m 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 5…
$ votes [3m[38;5;246m<dbl>[39m[23m 222, 215, 215, 205, 189, 146, 133, 111, 105, 80, 77, 60, 58, 55, 51, 47, 40, 39, 39, 3…
$ inducted [3m[38;5;246m<chr>[39m[23m "Y", "Y", "Y", "Y", "Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "…
$ category [3m[38;5;246m<chr>[39m[23m "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Playe…
$ needed_note [3m[38;5;246m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
head(Salaries)
glimpse(Salaries)
Observations: 26,428
Variables: 5
$ yearID [3m[38;5;246m<dbl>[39m[23m 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
$ teamID [3m[38;5;246m<chr>[39m[23m "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL"…
$ lgID [3m[38;5;246m<chr>[39m[23m "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL",…
$ playerID [3m[38;5;246m<chr>[39m[23m "barkele01", "bedrost01", "benedbr01", "campri01", "ceronri01", "chambch01", "dedmoje01",…
$ salary [3m[38;5;246m<dbl>[39m[23m 870000, 550000, 545000, 633333, 625000, 800000, 150000, 483333, 772000, 250000, 1500000, …
head(Batting)